#####################################################################################
##### Gov 2001 Replication                   ########################################
#### Table 1, Figure 4, Table    [Survival]  ########################################
#####################################################################################

require(stargazer)
require(foreign)
require(gridExtra)
library(ggplot2)
library(survival)
library(readstata13)
library(xtable)
require(MASS)
require(plm)
require(DataCombine)
library(mvtnorm)
library(zoo)
library(OutlierDC)
library(reshape2)

rm(list = ls())

### Change this to your directory
setwd("/Users/andrewleber/Documents/Harvard Study/2016 Spring/GOV 2001/Final Write-Up/Data - Final")
table <- read.csv("HRV_Survival_Data.csv", check.names = FALSE) 

#####################################################################################
### Replication of Cox Hazard Models
### Table 1 (p. 776), Table 2 (p. 778)
#####################################################################################

### Create Survival model using Survival package in R, Surv Object
### Starting, ending times for survival model are saved in Stata data under
### '_t0', '_t' columns. The failure "event" is $failrevolt - i.e. when a 
### regime falls due to revolt from below,

survival<-Surv(as.numeric(table$`_t0`), as.numeric(table$`_t`), table$failrevolt)

#######################################################################################
# We use the coxph() function in survival to estimate a Cox proportional hazards model.
# Utilizes covariates from data set, clusters by regime in question. 
#######################################################################################

######################################################################################
# Conditional on Past Collapse
#  - Replicate Model 8 from HRV
######################################################################################

### Table 1, Model 8

m8<- coxph(survival ~ lag_transparency + 
             lag_growth + lag_trans_growth + 
             lag_party +  lag_mil + ever_collapse
           + cluster(AutocRegimeID), data=table)
m8

###########################################################################################
# Figure 4 - Hazard Rates
# This figure presents instantaneous hazard rates for a given regime in year t (~10 to 40)
# We are still seeking to replicate this exact figure in R
# Different packages - such as the survfit() function - can provide us with the *cumulative*
# hazard rates regimes face, but we have struggled to calculate the baseline hazard rate 
# an autocratic regime faces at time t of failing at time t+1
###########################################################################################

### Store the low/high transparency percentiles according to 
### Figure 4 specifications
transhigh <- quantile(table$transparencyindex, 0.9)
translow <- quantile(table$transparencyindex, 0.1)

### Store the low/high growth percentiles according to
### Figure 4 specifications
growthhigh <- quantile(table$grgdpch, 0.9, na.rm=T)
growthlow <- quantile(table$grgdpch, 0.1, na.rm=T)

### Generate interaction terms
lowlowint <- as.numeric(translow*growthlow)
lowhighint <- as.numeric(translow*growthhigh)
highlowint <- as.numeric(transhigh*growthlow)
highhighint <- as.numeric(transhigh*growthhigh)

### Creates data to use to plot LL curve - Low trans, low growth
plotLL <- data.frame(lag_transparency=translow,
                     lag_growth=growthlow,
                     lag_trans_growth=lowlowint,
                     lag_party=0,
                     lag_mil=0,
                     ever_collapse=1)

### Creates data to use to plot LH curve - low trans, high growth
plotLH <- data.frame(lag_transparency=translow,
                     lag_growth=growthhigh,
                     lag_trans_growth=lowhighint,
                     lag_party=0,
                     lag_mil=0,
                     ever_collapse=1)

### Creates data to use to plot HL curve - high trans, low growth
plotHL <- data.frame(lag_transparency=transhigh,
                     lag_growth=growthlow,
                     lag_trans_growth=highlowint,
                     lag_party=0,
                     lag_mil=0,
                     ever_collapse=1)

### Creates data to use to plot HH curve - high trans, high growth
plotHH <- data.frame(lag_transparency=transhigh,
                     lag_growth=growthhigh,
                     lag_trans_growth=highhighint,
                     lag_party=0,
                     lag_mil=0,
                     ever_collapse=1)

# store various curves
curvelist<-list()

### Store curves for our 4 set values
curvelist[[1]] <- survfit(m8, newdata=plotLL)
curvelist[[2]] <- survfit(m8, newdata=plotLH)
curvelist[[3]] <- survfit(m8, newdata=plotHL)
curvelist[[4]] <- survfit(m8, newdata=plotHH)

### Extract the critical times from the surfit models (should be same for each one)
time.temp <- curvelist[[1]]$time
ytemp <- matrix(0, length(time.temp), length(curvelist)*3)

### Store each of these survfit functions as smoothed curves
for (i in 1:length(curvelist)) {
  ss <- smooth.spline(curvelist[[i]]$time, -log(curvelist[[i]]$surv), df=6)  #smoothed cum hazard
  ssUL<- smooth.spline(curvelist[[i]]$time, -log(curvelist[[i]]$upper), df=6)
  ssLL<- smooth.spline(curvelist[[i]]$time, -log(curvelist[[i]]$lower), df=6)
  i<-((i-1)*3)+1
  ytemp[,i] <- predict(ss, deriv=1)$y  #hazard functions
  ytemp[,i+1] <- predict(ssUL, deriv=1)$y
  ytemp[,i+2] <- predict(ssLL, deriv=1)$y
}

### Plot outcomes with high/low transprency
pdf("Figure_4.pdf", width=9, height=5)
par(oma=c(0,0,2,0))
par(mfrow=c(1,2))
matplot(time.temp, ytemp[,1], type='l', lty = 1, col="red",
        ylab = "Hazard Rate", xlab = "Years",
        main = "Low Transparency",
        xlim = c(7, 45), ylim = c(0, 0.06))
#matlines(time.temp, ytemp[,2:3], type='l', lty = 2, col="red", xlim = c(7, 45), ylim = c(0, 0.06))
matlines(time.temp, ytemp[,4], type='l', lty = 1, col="blue", xlim = c(7, 45), ylim = c(0, 0.06))
#matlines(time.temp, ytemp[,5:6], type='l', lty = 2, col="blue", xlim = c(7, 45), ylim = c(0, 0.06))
text(12, 0.005, "High Growth", cex = .8, col = "blue")
text(12, 0.025, "Low Growth", cex = .8, col = "red")

matplot(time.temp, ytemp[,7], type='l', lty = 1, col="red", 
        ylab = "Hazard Rate", xlab = "Years",
        main = "High Transparency",
        xlim = c(7, 45), ylim = c(0, 0.06))
#matlines(time.temp, ytemp[,8:9], type='l', lty = 2, col="red", xlim = c(7, 45), ylim = c(0, 0.06))
matlines(time.temp, ytemp[,10], type='l', lty = 1, col="blue", xlim = c(7, 45), ylim = c(0, 0.06))
#matlines(time.temp, ytemp[,11:12], type='l', lty = 2, col="blue", xlim = c(7, 45), ylim = c(0, 0.06))
text(12, 0.012, "High Growth", cex = .8, col = "blue")
text(17, 0.045, "Low Growth", cex = .8, col = "red")
title("Hazard Ratio - Transparency and Growth", outer=T, cex = 2, font = 2)
dev.off()

###########################################################################################
# Confidence Intervals on Hazard Rates
# This figure presents 95% confidence intervals from the Hazard Ratios in Figure 4
###########################################################################################
vcov<-vcov(m8)
beta<-m8$coefficients

set.seed(8766)
nIter<-10000

### Simulate 10,000 Beta sets
simBetas.HRV<-rmvnorm(n = 10000, 
                      mean = beta, 
                      sigma = vcov)

# Relative risk of higher transparency - low growth
diff<-rep(NA, nIter)

# Simulate values of the hazard ratio between hi/low transparency regimes
for(i in 1:nIter){
  XiB<-as.matrix(plotHL)%*%as.matrix(simBetas.HRV[i,])-
    as.matrix(plotLL)%*%as.matrix(simBetas.HRV[i,])
  diff[i]<-XiB
}

quantile(diff, 0.975) # 6.99
quantile(diff, 0.025) # 0.745


# Relative risk of higher transparency - High Growth
diff2<-rep(NA, nIter)

# Simulate values of the hazard ratio between hi/low transparency regimes
for(i in 1:nIter){
  XiB<-as.matrix(plotHH)%*%as.matrix(simBetas.HRV[i,])-
    as.matrix(plotLH)%*%as.matrix(simBetas.HRV[i,])
  diff2[i]<-XiB
}

### Upper and lower bounds for 95% CI - crosses 1
quantile(diff2, 0.975)
quantile(diff2, 0.025)


# Relative risk of higher growth - low transparency
diff3<-rep(NA, nIter)

# Simulate values of the hazard ratio between hi/low growth regimes
for(i in 1:nIter){
  XiB<-as.matrix(plotLH)%*%as.matrix(simBetas.HRV[i,])-
    as.matrix(plotLL)%*%as.matrix(simBetas.HRV[i,])
  diff3[i]<-XiB
}

quantile(diff3, 0.975) 
quantile(diff3, 0.025) 

# Relative risk of higher growth - high transparency
diff4<-rep(NA, nIter)

# Simulate values of the hazard ratio between hi/low growth regimes
for(i in 1:nIter){
  XiB<-as.matrix(plotHH)%*%as.matrix(simBetas.HRV[i,])-
    as.matrix(plotHL)%*%as.matrix(simBetas.HRV[i,])
  diff4[i]<-XiB
}

### Upper and lower bounds for 95% CI - crosses 1
quantile(diff4, 0.975)
quantile(diff4, 0.025)

###### Graphical plot of hazard ratios, confidence intervals
###### Graphed adjusted to depict a log scale on the Y axis
pdf("Figure_4_CI.pdf", width = 9, height = 5)
par(oma=c(0,0,2,0))
par(mfrow=c(1,2))
plot(x=c(1.5, 2.5), y=c(mean(diff), mean(diff2)), ylim = c(log(0.1), log(10)), xlim = c(1,3),
     main = c("High vs. Low Transparency"), axes=F,
     xlab = "", ylab = "Hazard Ratio", lwd = 1)

ticks <- c(0.1, 0.5, 1, 2, 10)
labels <- sapply(ticks, function(i) as.expression(i))
axis(2, at = log(ticks), labels=labels)
axis(1, at = c(1.5,2.5), labels = c("Low Growth", "High Growth"))
segments(x0 = c(1.5, 2.5),
         y0 = c(quantile(diff, 0.025),quantile(diff2, 0.025)),
         y1 = c(quantile(diff, 0.975),quantile(diff2, 0.975)), lwd = 1)
abline(h = 0, lty = 3, lwd = 2)

plot(x=c(1.5, 2.5), y=c(mean(diff3), mean(diff4)), ylim = c(log(0.1), log(10)), xlim = c(1,3),
     main = c("High vs. Low Growth"), axes=F,
     xlab = "", ylab = "Hazard Ratio", lwd = 1)
ticks <- c(0.1, 0.5, 1, 2, 10)
labels <- sapply(ticks, function(i) as.expression(i))
axis(2, at = log(ticks), labels=labels)
axis(1, at = c(1.5,2.5), labels = c("Low Transparency", "High Transparency"))
segments(x0 = c(1.5, 2.5),
         y0 = c(quantile(diff3, 0.025),quantile(diff4, 0.025)),
         y1 = c(quantile(diff3, 0.975),quantile(diff4, 0.975)), lwd = 1)
abline(h = 0, lty = 3, lwd = 2)
title("Hazard Ratios - Transparency and Growth", outer=T, cex = 2, font = 2)
dev.off()


###########################################################################################
# Competing measures of "openness" in society
###########################################################################################
### Adding press freedom
m8pf<- coxph(survival ~ lag_transparency + lag_pf +
               lag_growth + lag_trans_growth + 
               lag_party +  lag_mil + ever_collapse
             + cluster(AutocRegimeID), data=table)
m8pf

### Adding media access

m8ma<- coxph(survival ~ lag_transparency + v2meaccess +
               lag_growth + lag_trans_growth + 
               lag_party +  lag_mil + ever_collapse
             + cluster(AutocRegimeID), data=table)
m8ma

stargazer(m8pf, m8ma)

################################################################################
################################################################################
## Media access
################################################################################
################################################################################
## Generate survival model with media access
lag_access_growth <- table$v2meaccess*table$lag_growth

m8k<- coxph(survival ~  v2meaccess + lag_growth + lag_access_growth +
              lag_party +  lag_mil + ever_collapse + 
              + cluster(AutocRegimeID), data=table)
m8k


acchigh <- quantile(table$v2meaccess, 0.9, na.rm=T)
acclow <- quantile(table$v2meaccess, 0.1, na.rm=T)

### Store the low/high growth percentiles according to
### Figure 4 specifications
growthhigh <- quantile(table$grgdpch, 0.9, na.rm=T)
growthlow <- quantile(table$grgdpch, 0.1, na.rm=T)

### Generate interaction terms
lowlowint <- as.numeric(acclow*growthlow)
lowhighint <- as.numeric(acclow*growthhigh)
highlowint <- as.numeric(acchigh*growthlow)
highhighint <- as.numeric(acchigh*growthhigh)

### Creates data to use to plot LL curve - Low trans, low growth
accLL <- data.frame(v2meaccess=acclow,
                    lag_growth=growthlow,
                    lag_access_growth=lowlowint,
                    lag_party=0,
                    lag_mil=0,
                    ever_collapse=1)

### Creates data to use to plot LH curve - low trans, high growth
accLH <- data.frame(v2meaccess=acclow,
                    lag_growth=growthhigh,
                    lag_access_growth=lowhighint,
                    lag_party=0,
                    lag_mil=0,
                    ever_collapse=1)

### Creates data to use to plot HL curve - high trans, low growth
accHL <- data.frame(v2meaccess=acchigh,
                    lag_growth=growthlow,
                    lag_access_growth=highlowint,
                    lag_party=0,
                    lag_mil=0,
                    ever_collapse=1)

### Creates data to use to plot HH curve - high trans, high growth
accHH <- data.frame(v2meaccess=acchigh,
                    lag_growth=growthhigh,
                    lag_access_growth=highhighint,
                    lag_party=0,
                    lag_mil=0,
                    ever_collapse=1)

# store various curves
curve.acc<-list()

### Store curves for our 4 set values
curve.acc[[1]] <- survfit(m8k, newdata=accLL)
curve.acc[[2]] <- survfit(m8k, newdata=accLH)
curve.acc[[3]] <- survfit(m8k, newdata=accHL)
curve.acc[[4]] <- survfit(m8k, newdata=accHH)

### Extract the critical times from the surfit models (should be same for each one)
time.temp.acc <- curve.acc[[1]]$time
ytemp.acc <- matrix(0, length(time.temp.acc), length(curve.acc)*3)

### Store each of these survfit functions as smoothed curves
for (i in 1:length(curve.acc)) {
  ss <- smooth.spline(curve.acc[[i]]$time, -log(curve.acc[[i]]$surv), df=6)  #smoothed cum hazard
  ssUL<- smooth.spline(curve.acc[[i]]$time, -log(curve.acc[[i]]$upper), df=6)
  ssLL<- smooth.spline(curve.acc[[i]]$time, -log(curve.acc[[i]]$lower), df=6)
  i<-((i-1)*3)+1
  ytemp.acc[,i] <- predict(ss, deriv=1)$y  #hazard functions
  ytemp.acc[,i+1] <- predict(ssUL, deriv=1)$y
  ytemp.acc[,i+2] <- predict(ssLL, deriv=1)$y
}


###########################################################################################
# Confidence Intervals on Hazard Rates for Media
# This figure presents 95% confidence intervals on the generated hazard rates
###########################################################################################
### Extract coefficients, vcov matrix from media access model
vcov.acc<-vcov(m8k)
beta.acc<-m8k$coefficients

set.seed(8766)
nIter<-10000
diff<-rep(NA, nIter)

### Simulate 10,000 Beta sets
simBetas.acc<-rmvnorm(n = 10000, 
                      mean = beta.acc, 
                      sigma = vcov.acc)

# Generate hazard ratios for low/high media access, low growth
for(i in 1:nIter){
  XiB<-as.matrix(accHL)%*%as.matrix(simBetas.acc[i,])-
    as.matrix(accLL)%*%as.matrix(simBetas.acc[i,])
  diff[i]<-XiB
}

### Upper and lower bounds for 95% CI - crosses 1 (Still crosses one here)
quantile(diff, 0.975) # 0.498
quantile(diff, 0.025) # 0.046

## Lower education --> Lower Risk

diff2<-rep(NA, nIter)

### Generate hazard ratios for Low/High media access, High Growth
for(i in 1:nIter){
  XiB<-as.matrix(accHH)%*%as.matrix(simBetas.acc[i,])-
    as.matrix(accLH)%*%as.matrix(simBetas.acc[i,])
  diff2[i]<-XiB
}

quantile(diff2, 0.975)   # 0.949
quantile(diff2, 0.025)   # 0.019

###### Plot lines for different hazard rates
pdf("Risk_Media_Access.pdf", width = 8, height = 4)
par(oma=c(0,0.5,2,0.5))
layout(matrix(c(1,2,3), 1, 3, byrow = TRUE))
matplot(time.temp.acc, ytemp.acc[,1], type='l', lty = 1, col="red", 
        ylab = "Hazard Rate", xlab = "Years",
        main = "Low Access",
        xlim = c(7, 45), ylim = c(0, 0.15))
#matlines(time.temp.acc, ytemp.acc[,8:9], type='l', lty = 2, col="red", xlim = c(7, 45), ylim = c(0, 0.06))
matlines(time.temp.acc, ytemp.acc[,4], type='l', lty = 1, col="blue", xlim = c(7, 45), ylim = c(0, 0.06))
#matlines(time.temp.acc, ytemp.acc[,11:12], type='l', lty = 2, col="blue", xlim = c(7, 45), ylim = c(0, 0.06))
text(12, 0.025, "High Growth", cex = .8, col = "blue")
text(29, 0.025, "Low Growth", cex = .8, col = "red")

matplot(time.temp.acc, ytemp.acc[,7], type='l', lty = 1, col="red", 
        ylab = "Hazard Rate", xlab = "Years",
        main = "High Access",
        xlim = c(7, 45), ylim = c(0, 0.15))
#matlines(time.temp.acc, ytemp.acc[,8:9], type='l', lty = 2, col="red", xlim = c(7, 45), ylim = c(0, 0.06))
matlines(time.temp.acc, ytemp.acc[,10], type='l', lty = 1, col="blue", xlim = c(7, 45), ylim = c(0, 0.06))
#matlines(time.temp.acc, ytemp.acc[,11:12], type='l', lty = 2, col="blue", xlim = c(7, 45), ylim = c(0, 0.06))
text(12, 0.045, "High Growth", cex = .8, col = "blue")
text(24, 0.11, "Low Growth", cex = .8, col = "red")

# Plot 95% confidence intervals, log scale on the y axis
plot(x=c(1.5,2.5), y=c(mean(diff), mean(diff2)), ylim = c(-0.1, 5.2), xlim = c(1.2,2.8),
     main = c("High vs. Low Media Access"), axes=F,
     xlab = "",  ylab = "Hazard Ratio", lwd = 1)

ticks <- c(1,2, 10, 100, 200)
labels <- sapply(ticks, function(i) as.expression(i))
axis(2, at = log(ticks), labels=labels)

axis(1, at = c(1.5,2.5), labels = c("Low Growth", "High Growth"))
segments(x0 = c(1.5, 2.5),
         y0 = c(quantile(diff, 0.025),quantile(diff2, 0.025)),
         y1 = c(quantile(diff, 0.975),quantile(diff2, 0.975)), lwd = 1)
abline(h = 0, lty = 3)
title("Hazard Ratios - Media Access", outer=T, cex = 2, font = 2)
dev.off()

####################################################################################
####################################################################################

#####################################################################################
#  Education - Varieties of Democracy
#####################################################################################
### Estimate survival model with V-Dem measures of Education
lag_ed_growth <- table$e_peaveduc*table$lag_growth

m8d <- coxph(survival ~ + e_peaveduc + lag_growth +  lag_ed_growth + 
               lag_party +  lag_mil + ever_collapse  +
               cluster(AutocRegimeID), data=table)

m8d

# Store high/low education percentiles
edhigh <- quantile(table$e_peaveduc, 0.9, na.rm=T)
edlow <- quantile(table$e_peaveduc, 0.1, na.rm=T)

### Store the low/high growth percentiles according to
### Figure 4 specifications
growthhigh <- quantile(table$grgdpch, 0.9, na.rm=T)
growthlow <- quantile(table$grgdpch, 0.1, na.rm=T)

### Generate interaction terms
lowlowint <- as.numeric(edlow*growthlow)
lowhighint <- as.numeric(edlow*growthhigh)
highlowint <- as.numeric(edhigh*growthlow)
highhighint <- as.numeric(edhigh*growthhigh)

### Creates data to use to plot LL curve - Low ed, low growth
edLL <- data.frame(e_peaveduc=edlow,
                   lag_growth=growthlow,
                   lag_ed_growth=lowlowint,
                   lag_party=0,
                   lag_mil=0,
                   ever_collapse=1)

### Creates data to use to plot LH curve - low ed, high growth
edLH <- data.frame(e_peaveduc=edlow,
                   lag_growth=growthhigh,
                   lag_ed_growth=lowhighint,
                   lag_party=0,
                   lag_mil=0,
                   ever_collapse=1)

### Creates data to use to plot HL curve - high ed, low growth
edHL <- data.frame(e_peaveduc=edhigh,
                   lag_growth=growthlow,
                   lag_ed_growth=highlowint,
                   lag_party=0,
                   lag_mil=0,
                   ever_collapse=1)

### Creates data to use to plot HH curve - high ed, high growth
edHH <- data.frame(e_peaveduc=edhigh,
                   lag_growth=growthhigh,
                   lag_ed_growth=highhighint,
                   lag_party=0,
                   lag_mil=0,
                   ever_collapse=1)

# store various curves
curve.ed<-list()

### Store curves for our 4 set values
curve.ed[[1]] <- survfit(m8d, newdata=edLL)
curve.ed[[2]] <- survfit(m8d, newdata=edLH)
curve.ed[[3]] <- survfit(m8d, newdata=edHL)
curve.ed[[4]] <- survfit(m8d, newdata=edHH)

### Extract the critical times from the survfit models (should be same for each one)
time.temp.ed <- curve.ed[[1]]$time
ytemp.ed <- matrix(0, length(time.temp.ed), length(curve.ed)*3)

### Store each of these survfit functions as smoothed curves
for (i in 1:length(curve.ed)) {
  ss <- smooth.spline(curve.ed[[i]]$time, -log(curve.ed[[i]]$surv), df=6)  #smoothed cum hazard
  ssUL<- smooth.spline(curve.ed[[i]]$time, -log(curve.ed[[i]]$upper), df=6)
  ssLL<- smooth.spline(curve.ed[[i]]$time, -log(curve.ed[[i]]$lower), df=6)
  i<-((i-1)*3)+1
  ytemp.ed[,i] <- predict(ss, deriv=1)$y  #hazard functions
  ytemp.ed[,i+1] <- predict(ssUL, deriv=1)$y
  ytemp.ed[,i+2] <- predict(ssLL, deriv=1)$y
}

###########################################################################################
# Confidence Intervals on Hazard Rates for Education
###########################################################################################
### Extract covariates, vcov matrix from education Survival model
vcov.edu<-vcov(m8d)
beta.edu<-m8d$coefficients

set.seed(8766)
nIter<-10000
diff<-rep(NA, nIter)

### Simulate 10,000 Beta sets
simBetas.edu<-rmvnorm(n = 10000, 
                      mean = beta.edu, 
                      sigma = vcov.edu)

# Generate simulations for low/high education, low growth
for(i in 1:nIter){
  XiB<-as.matrix(edHL)%*%as.matrix(simBetas.edu[i,])-
    as.matrix(edLL)%*%as.matrix(simBetas.edu[i,])
  diff[i]<-XiB
}

### Upper and lower bounds for 95% CI - crosses 1 (Still crosses one here)
quantile(diff, 0.975)   # 0.616
quantile(diff, 0.025)   # 0.0519
## Lower education --> Lower Risk

diff2<-rep(NA, nIter)

### For Low/High education, High Growth
for(i in 1:nIter){
  XiB<-as.matrix(edHH)%*%as.matrix(simBetas.edu[i,])-
    as.matrix(edLH)%*%as.matrix(simBetas.edu[i,])
  diff2[i]<-XiB
}

### For Hi education, low growth is less risky than higher growth...

### All of this is *definitely* below 1...
### Suggests lower education a much lower risk in hi-growth environments? 
quantile(diff2, 0.975)   # 0.616
quantile(diff2, 0.025)   # 0.0519

###### Graphical plot of different hazard-rate curves
pdf("Risk_of_Education.pdf", width = 8, height = 4)
par(oma=c(0,0.5,2,0.5))
layout(matrix(c(1,2,3), 1, 3, byrow = TRUE))

matplot(time.temp.ed, ytemp.ed[,1], type='l', lty = 1, col="red",
        ylab = "Hazard Rate", xlab = "Years",
        main = "Low Education",
        xlim = c(7, 45), ylim = c(0, 0.06))
#matlines(time.temp.ed, ytemp.ed[,2:3], type='l', lty = 2, col="red", xlim = c(7, 45), ylim = c(0, 0.06))
matlines(time.temp.ed, ytemp.ed[,4], type='l', lty = 1, col="blue", xlim = c(7, 45), ylim = c(0, 0.06))
#matlines(time.temp.ed, ytemp.ed[,5:6], type='l', lty = 2, col="blue", xlim = c(7, 45), ylim = c(0, 0.06))
text(12, 0.000, "High Growth", cex = .8, col = "blue")
text(12, 0.023, "Low Growth", cex = .8, col = "red")
matplot(time.temp.ed, ytemp.ed[,7], type='l', lty = 1, col="red", 
        ylab = "Hazard Rate", xlab = "Years",
        main = "High Education",
        xlim = c(7, 45), ylim = c(0, 0.06))
#matlines(time.temp.ed, ytemp.ed[,8:9], type='l', lty = 2, col="red", xlim = c(7, 45), ylim = c(0, 0.06))
matlines(time.temp.ed, ytemp.ed[,10], type='l', lty = 1, col="blue", xlim = c(7, 45), ylim = c(0, 0.06))
#matlines(time.temp.ed, ytemp.ed[,11:12], type='l', lty = 2, col="blue", xlim = c(7, 45), ylim = c(0, 0.06))
text(12, 0.022, "High Growth", cex = .8, col = "blue")
text(12, 0.041, "Low Growth", cex = .8, col = "red")

# Plot 95% confidence intervals
plot(x=c(1.5, 2.5), y=c(mean(diff), mean(diff2)), ylim = c(-.1, 3.1), xlim = c(1.2,2.8),
     axes=F, main = c("High vs. Low Education"),
     xlab = "", ylab = "Hazard Ratio", lwd = 1)

ticks <- c(1, 2, 10, 25)
labels <- sapply(ticks, function(i) as.expression(i))
axis(2, at = log(ticks), labels=labels)

axis(1, at = c(1.5,2.5), labels = c("Low Growth", "High Growth"))
segments(x0 = c(1.5, 2.5),
         y0 = c(quantile(diff, 0.025),quantile(diff2, 0.025)),
         y1 = c(quantile(diff, 0.975),quantile(diff2, 0.975)), lwd = 1)
abline(h = 0, lty = 3, lwd = 2)
title("Hazard Ratios - Education", outer=T, cex = 2, font = 2)
dev.off()

####################################################################################
####################################################################################
####################################################################################
####################################################################################
# Gini education
lag_edgini_growth <- table$e_peedgini*table$lag_growth

m8j<- coxph(survival ~ e_peedgini  + lag_growth + lag_edgini_growth +
              lag_party +  lag_mil + ever_collapse +  
              cluster(AutocRegimeID), data=table)
m8j

edginihigh <- quantile(table$e_peedgini, 0.9, na.rm=T)
edginilow <- quantile(table$e_peedgini, 0.1, na.rm=T)

### Store the low/high growth percentiles according to
### Figure 4 specifications
growthhigh <- quantile(table$grgdpch, 0.9, na.rm=T)
growthlow <- quantile(table$grgdpch, 0.1, na.rm=T)

### Generate interaction terms
lowlowint <- as.numeric(edginilow*growthlow)
lowhighint <- as.numeric(edginilow*growthhigh)
highlowint <- as.numeric(edginihigh*growthlow)
highhighint <- as.numeric(edginihigh*growthhigh)

### Creates data to use to plot LL curve - Low trans, low growth
giniLL <- data.frame(e_peedgini=edginilow,
                     lag_growth=growthlow,
                     lag_edgini_growth=lowlowint,
                     lag_party=0,
                     lag_mil=0,
                     ever_collapse=1)

### Creates data to use to plot LH curve - low trans, high growth
giniLH <- data.frame(e_peedgini=edginilow,
                     lag_growth=growthhigh,
                     lag_edgini_growth=lowhighint,
                     lag_party=0,
                     lag_mil=0,
                     ever_collapse=1)

### Creates data to use to plot HL curve - high trans, low growth
giniHL <- data.frame(e_peedgini=edginihigh,
                     lag_growth=growthlow,
                     lag_edgini_growth=highlowint,
                     lag_party=0,
                     lag_mil=0,
                     ever_collapse=1)

### Creates data to use to plot HH curve - high trans, high growth
giniHH <- data.frame(e_peedgini=edginihigh,
                     lag_growth=growthhigh,
                     lag_edgini_growth=highhighint,
                     lag_party=0,
                     lag_mil=0,
                     ever_collapse=1)

# store various curves
curve.gini<-list()

### Store curves for our 4 set values
curve.gini[[1]] <- survfit(m8j, newdata=giniLL)
curve.gini[[2]] <- survfit(m8j, newdata=giniLH)
curve.gini[[3]] <- survfit(m8j, newdata=giniHL)
curve.gini[[4]] <- survfit(m8j, newdata=giniHH)

### Extract the critical times from the surfit models (should be same for each one)
time.temp.gini <- curve.gini[[1]]$time
ytemp.gini <- matrix(0, length(time.temp.gini), length(curve.gini)*3)

### Store each of these survfit functions as smoothed curves
for (i in 1:length(curve.gini)) {
  ss <- smooth.spline(curve.gini[[i]]$time, -log(curve.gini[[i]]$surv), df=6)  #smoothed cum hazard
  ssUL<- smooth.spline(curve.gini[[i]]$time, -log(curve.gini[[i]]$upper), df=6)
  ssLL<- smooth.spline(curve.gini[[i]]$time, -log(curve.gini[[i]]$lower), df=6)
  i<-((i-1)*3)+1
  ytemp.gini[,i] <- predict(ss, deriv=1)$y  #hazard functions
  ytemp.gini[,i+1] <- predict(ssUL, deriv=1)$y
  ytemp.gini[,i+2] <- predict(ssLL, deriv=1)$y
}

###########################################################################################
# Confidence Intervals on Hazard Rates for Education Gini
# This  presents 95% confidence intervals on the generated hazard rates
###########################################################################################
# Extract coefficients, vcov matrix for education gini
vcov.gini<-vcov(m8j)
beta.gini<-m8j$coefficients

set.seed(8766)
nIter<-10000
diff<-rep(NA, nIter)

### Simulate 10,000 Beta sets
simBetas.gini<-rmvnorm(n = 10000, 
                       mean = beta.gini, 
                       sigma = vcov.gini)

# Generate simulations for low/high ed gini, low growth
for(i in 1:nIter){
  XiB<-as.matrix(giniLL)%*%as.matrix(simBetas.gini[i,])-
    as.matrix(giniHL)%*%as.matrix(simBetas.gini[i,])
  diff[i]<-XiB
}

diff2<-rep(NA, nIter)

### For Low/High ed gini, High Growth
for(i in 1:nIter){
  XiB<-as.matrix(giniLH)%*%as.matrix(simBetas.gini[i,])-
    as.matrix(giniHH)%*%as.matrix(simBetas.gini[i,])
  diff2[i]<-XiB
}

### All of this is *definitely* below 1...
### Suggests lower education a much lower risk in hi-growth environments? 
quantile(diff2, 0.975)   # 15.5
quantile(diff2, 0.025)   # 0.405

###### Graphical plot of different hazard curves
pdf("Risk_Education_Gini.pdf", width = 9, height = 4.5)
par(oma=c(0,0.5,2,0.5))
layout(matrix(c(1,2,3), 1, 3, byrow = TRUE))

matplot(time.temp.gini, ytemp.gini[,1], type='l', lty = 1, col="red", 
        ylab = "Hazard Rate", xlab = "Years",
        main = "Low Educational Gini",
        xlim = c(7, 45), ylim = c(0, 0.15))
#matlines(time.temp.gini, ytemp.gini[,2:3], type='l', lty = 2, col="red", xlim = c(7, 45), ylim = c(0, 0.06))
matlines(time.temp.gini, ytemp.gini[,4], type='l', lty = 1, col="blue", xlim = c(7, 45), ylim = c(0, 0.06))
#matlines(time.temp.gini, ytemp.gini[,5:6], type='l', lty = 2, col="blue", xlim = c(7, 45), ylim = c(0, 0.06))
text(12, 0.040, "High Growth", cex = .8, col = "blue")
text(20, 0.11, "Low Growth", cex = .8, col = "red")

matplot(time.temp.gini, ytemp.gini[,7], type='l', lty = 1, col="red", 
        ylab = "Hazard Rate", xlab = "Years",
        main = "High Educational Gini",
        xlim = c(7, 45), ylim = c(0, 0.15))
#matlines(time.temp.gini, ytemp.gini[,8:9], type='l', lty = 2, col="red", xlim = c(7, 45), ylim = c(0, 0.06))
matlines(time.temp.gini, ytemp.gini[,10], type='l', lty = 1, col="blue", xlim = c(7, 45), ylim = c(0, 0.06))
#matlines(time.temp.gini, ytemp.gini[,11:12], type='l', lty = 2, col="blue", xlim = c(7, 45), ylim = c(0, 0.06))
text(12, 0.025, "High Growth", cex = .8, col = "blue")
text(29, 0.025, "Low Growth", cex = .8, col = "red")

# Plot of 95% confidence intervals
plot(x=c(1.5,2.5), y=c(mean(diff), mean(diff2)), ylim = c(log(0.1), log(50)), xlim = c(1.2,2.8),
     main = c("Low vs. High Ed Gini"), axes=F,
     xlab = "",  ylab = "Hazard Ratio", lwd = 1)

ticks <- c(0.1, 0.5, 1, 2, 10, 50)
labels <- sapply(ticks, function(i) as.expression(i))
axis(2, at = log(ticks), labels=labels)

axis(1, at = c(1.5,2.5), labels = c("Low Growth", "High Growth"))
segments(x0 = c(1.5, 2.5),
         y0 = c(quantile(diff, 0.025),quantile(diff2, 0.025)),
         y1 = c(quantile(diff, 0.975),quantile(diff2, 0.975)), lwd = 1)
abline(h = 0, lty = 3)

title("Hazard Ratios - Educational Gini", outer=T, cex = 2, font = 2)
dev.off()


####################################################################################
####################################################################################
### Polyarchy
####################################################################################
####################################################################################
### Polyarchy
lag_pol_growth <- table$v2x_polyarchy*table$lag_growth

m8p<- coxph(survival ~ v2x_polyarchy + lag_growth + lag_pol_growth +
              lag_party +  lag_mil + ever_collapse +  
              cluster(AutocRegimeID), data=table)
m8p

polhigh <- quantile(table$v2x_polyarchy, 0.9, na.rm=T)
pollow <- quantile(table$v2x_polyarchy, 0.1, na.rm=T)

### Store the low/high growth percentiles according to
###  4 specifications
growthhigh <- quantile(table$grgdpch, 0.9, na.rm=T)
growthlow <- quantile(table$grgdpch, 0.1, na.rm=T)

### Generate interaction terms
lowlowint <- as.numeric(pollow*growthlow)
lowhighint <- as.numeric(pollow*growthhigh)
highlowint <- as.numeric(polhigh*growthlow)
highhighint <- as.numeric(polhigh*growthhigh)

### Creates data to use to plot LL curve - Low trans, low growth
polLL <- data.frame(v2x_polyarchy=pollow,
                    lag_growth=growthlow,
                    lag_pol_growth=lowlowint,
                    lag_party=0,
                    lag_mil=0,
                    ever_collapse=1)

### Creates data to use to plot LH curve - low trans, high growth
polLH <- data.frame(v2x_polyarchy=pollow,
                    lag_growth=growthhigh,
                    lag_pol_growth=lowhighint,
                    lag_party=0,
                    lag_mil=0,
                    ever_collapse=1)

### Creates data to use to plot HL curve - high trans, low growth
polHL <- data.frame(v2x_polyarchy=polhigh,
                    lag_growth=growthlow,
                    lag_pol_growth=highlowint,
                    lag_party=0,
                    lag_mil=0,
                    ever_collapse=1)

### Creates data to use to plot HH curve - high trans, high growth
polHH <- data.frame(v2x_polyarchy=polhigh,
                    lag_growth=growthhigh,
                    lag_pol_growth=highhighint,
                    lag_party=0,
                    lag_mil=0,
                    ever_collapse=1)

# store various curves
curve.pol<-list()

### Store curves for our 4 set values
curve.pol[[1]] <- survfit(m8p, newdata=polLL)
curve.pol[[2]] <- survfit(m8p, newdata=polLH)
curve.pol[[3]] <- survfit(m8p, newdata=polHL)
curve.pol[[4]] <- survfit(m8p, newdata=polHH)

### Extract the critical times from the surfit models (should be same for each one)
time.temp.pol <- curve.pol[[1]]$time
ytemp.pol <- matrix(0, length(time.temp.pol), length(curve.pol)*3)

### Store each of these survfit functions as smoothed curves
for (i in 1:length(curve.pol)) {
  ss <- smooth.spline(curve.pol[[i]]$time, -log(curve.pol[[i]]$surv), df=6)  #smoothed cum hazard
  ssUL<- smooth.spline(curve.pol[[i]]$time, -log(curve.pol[[i]]$upper), df=6)
  ssLL<- smooth.spline(curve.pol[[i]]$time, -log(curve.pol[[i]]$lower), df=6)
  i<-((i-1)*3)+1
  ytemp.pol[,i] <- predict(ss, deriv=1)$y  #hazard functions
  ytemp.pol[,i+1] <- predict(ssUL, deriv=1)$y
  ytemp.pol[,i+2] <- predict(ssLL, deriv=1)$y
}

###########################################################################################
# Confidence Intervals on Hazard Rates for Polyarchy
# This  presents 95% confidence intervals on the generated hazard rates
###########################################################################################
# Extract coefficients, vcov matrix for polyarchy
vcov.pol<-vcov(m8p)
beta.pol<-m8p$coefficients

set.seed(8766)
nIter<-10000
diff<-rep(NA, nIter)

### Simulate 10,000 Beta sets
simBetas.pol<-rmvnorm(n = 10000, 
                      mean = beta.pol, 
                      sigma = vcov.pol)

# Generate simulations for low/high ed pol, low growth
for(i in 1:nIter){
  XiB<-as.matrix(polHL)%*%as.matrix(simBetas.pol[i,])-
    as.matrix(polLL)%*%as.matrix(simBetas.pol[i,])
  diff[i]<-XiB
}

### Upper and lower bounds for 95% CI - crosses 1 (Still crosses one here)
quantile(diff, 0.975) 
quantile(diff, 0.025) 
## Lower education --> Lower Risk

diff2<-rep(NA, nIter)

### For Low/High ed pol, High Growth
for(i in 1:nIter){
  XiB<-as.matrix(polHH)%*%as.matrix(simBetas.pol[i,])-
    as.matrix(polLH)%*%as.matrix(simBetas.pol[i,])
  diff2[i]<-XiB
}

quantile(diff2, 0.975)   
quantile(diff2, 0.025)   

###### Graphical plot of different hazard curves
pdf("Risk_of_Polyarchy.pdf", width = 8, height = 4)
par(oma=c(0,0.5,2,0.5))
layout(matrix(c(1,2,3), 1, 3, byrow = TRUE))

matplot(time.temp.pol, ytemp.pol[,1], type='l', lty = 1, col="red", 
        ylab = "Hazard Rate", xlab = "Years",
        main = "Low Democracy",
        xlim = c(7, 45), ylim = c(0, 0.25))
#matlines(time.temp.pol, ytemp.pol[,2:3], type='l', lty = 2, col="red", xlim = c(7, 45), ylim = c(0, 0.06))
matlines(time.temp.pol, ytemp.pol[,4], type='l', lty = 1, col="blue", xlim = c(7, 45), ylim = c(0, 0.06))
#matlines(time.temp.pol, ytemp.pol[,5:6], type='l', lty = 2, col="blue", xlim = c(7, 45), ylim = c(0, 0.06))
text(15, 0.03, "High Growth", cex = .8, col = "blue")
text(35, 0.03, "Low Growth", cex = .8, col = "red")

matplot(time.temp.pol, ytemp.pol[,7], type='l', lty = 1, col="red", 
        ylab = "Hazard Rate", xlab = "Years",
        main = "High Democracy",
        xlim = c(7, 45), ylim = c(0, 0.25))
#matlines(time.temp.pol, ytemp.pol[,8:9], type='l', lty = 2, col="red", xlim = c(7, 45), ylim = c(0, 0.06))
matlines(time.temp.pol, ytemp.pol[,10], type='l', lty = 1, col="blue", xlim = c(7, 45), ylim = c(0, 0.06))
#matlines(time.temp.pol, ytemp.pol[,11:12], type='l', lty = 2, col="blue", xlim = c(7, 45), ylim = c(0, 0.06))
text(32, 0.033, "High Growth", cex = .8, col = "blue")
text(32, 0.15, "Low Growth", cex = .8, col = "red")

# Plot different confidence intervals
plot(x=c(1.5,2.5), y=c(mean(diff), mean(diff2)), ylim = c(log(0.95), log(150)), xlim = c(1.2,2.8),
     main = c("Low vs. High Democracy"), axes=F,
     xlab = "",  ylab = "Hazard Ratio", lwd = 1)

ticks <- c(1, 2, 10, 100, 200)
labels <- sapply(ticks, function(i) as.expression(i))
axis(2, at = log(ticks), labels=labels)

axis(1, at = c(1.5,2.5), labels = c("Low Growth", "High Growth"))
segments(x0 = c(1.5, 2.5),
         y0 = c(quantile(diff, 0.025),quantile(diff2, 0.025)),
         y1 = c(quantile(diff, 0.975),quantile(diff2, 0.975)), lwd = 1)
abline(h = 0, lty = 3, lwd = 2)
title("Hazard Ratios - Level of Democracy", outer=T, cex = 2, font = 2)
dev.off()

####################################################################################
####################################################################################
stargazer( m8k, m8d, m8j, m8p)

####################################################################################
####################################################################################